############################################################################
# 
# Incapsulates package interaction
#
# Copyright (c) 2013-14, Alexander Galanin <al@galanin.nnov.ru>
# All rights reserved.
# 
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# 
# * Redistributions of source code must retain the above copyright notice,
#   this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
#   notice, this list of conditions and the following disclaimer in the
#   documentation and/or other materials provided with the distribution.
# * Neither the name of Alexander Galanin nor the names of its contributors
#   may be used to endorse or promote products derived from this software
#   without specific prior written permission.
# 
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL Alexander Galanin BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
############################################################################

package require Tcl 8.5
package require struct::set
package require cmdline
package require logger

package require glob_matcher
package require httplib
package require cache

package provide mediator 1.0

namespace eval mediator {
::logger::init [namespace current]
::logger::import -force -all -namespace log [namespace current]

namespace export run
namespace ensemble create

namespace import \
    ::cmdline::getopt

variable matcher
variable dataPattern
variable itemPattern
variable itemCommand
variable moreInfoCommand
variable generatorCommand
variable requiredFields
# minimum time for item added to the feed (UNIX time)
variable minTime

# Match string to dataPattern using specified matcher
proc match {string} {
    variable matcher
    variable dataPattern

    switch $matcher {
        glob {
            return [::glob_matcher::match $dataPattern $string]
        }
        regexp {
            return [regexp -inline -- $dataPattern $string]
        }
        default {
            error "unknown matcher `$matcher'"
        }
    }
}

# Find all matches in string and pass it to specified command
# @param string data to parse
proc matchList {string} {
    variable matcher
    variable itemPattern

    switch $matcher {
        glob {
            ::glob_matcher::match \
                -all \
                -command [namespace current]::processItem \
                $itemPattern $string
        }
        regexp {
            # number of matches per item (w/o whole match)
            set count [lindex [regexp -about $itemPattern] 0]
            # perform matching
            set matches [regexp -all -inline -- $itemPattern $string]
            # call command for all matches
            set resCount 0
            for {set i 0} {$i < [llength $matches]} {incr i $count;incr i} {
                processItem {*}[lrange $matches $i $i+$count]
                incr resCount
            }
            return $resCount
        }
        default {
            error "unknown matcher `$matcher'"
        }
    }
}

# Get information from string containing list of items. At first, dataPattern
# is matched against a string. At second, matched substring parsed using
# itemPattern and itemCommand is called for each item.
# @param string data to parse
# @return matches count
proc parseListPage {string} {
    # get data block
    set res [match $string]
    switch [llength $res] {
        0 {
            error "no data matched" {} MATCH
        }
        2 {
        }
        default {
            error "more than one match returned" {} MATCH
        }
    }
    set string [lindex $res 1]
    log::debug "matched page fragment: $string"
    # parse list of items
    matchList $string
}

# Download web page and invoke parser on it to generate RSS feed.
# User functions may use break (3) or continue (4) codes to tell that this
# item should be skipped.
# @param url page URL
# @param listDataPattern pattern for page fragment with list of items
# @param listItemPattern pattern for list item
# @param command command that takes all matches and produces key-value
# pairs for generator. Appended arguments: whole matched substring, match1,
# match2, ... matchN.
# @param genCommand generator's addItem command
# @param reqFields field names required to be passed to generator
# @param -info command that takes key-value pairs from 'itemCommand' and
# returns more information about feed item
# @param -mintime minimum time for item added to the feed (UNIX time)
# @param -matcher string matcher (glob (default) or regexp)
# @return number of items parsed from page
proc run {url listDataPattern listItemPattern command genCommand reqFields
        args} {
    variable dataPattern $listDataPattern
    variable itemPattern $listItemPattern
    variable itemCommand $command
    variable generatorCommand $genCommand
    variable requiredFields $reqFields

    parseArgs {*}$args

    log::info "parsing $url"
    set data [httplib get $url]
    log::debug "page data: $data"
    if {[catch {parseListPage $data} res opts]} {
        if {[lindex [dict get $opts -errorcode] 0] eq "MATCH"} {
            return -options $opts "$url: $res"
        } else {
            return -options $opts $res
        }
    }
    log::info "$res items fetched from $url"
    return $res
}

# Parse optional arguments for [run] and assign namespace variables
proc parseArgs {args} {
    variable matcher glob
    variable moreInfoCommand
    variable minTime

    set info {dict create}
    set mintime 0

    set usage "run url pattern pattern cmd cmd set ?-info cmd? ?-mintime s? ?-matcher type?"
    while {[set ret [getopt args {info.arg mintime.arg matcher.arg} key value]] == 1} {
        set $key $value
    }
    if {$ret == -1} {
        error "$value. usage: $usage"
    } elseif {[llength $args] != 0} {
        error "usage: $usage"
    }
    set moreInfoCommand $info
    set minTime $mintime
}

# Invoke itemCommand on a match. If there are required arguments missed from
# result, try to get them from cache and finally pipe result through
# moreInfoCommand and pass results into generatorCommand.
# User functions may use break (3) or continue (4) codes to tell that this
# item should be skipped.
proc processItem {args} {
    variable itemCommand
    variable moreInfoCommand
    variable generatorCommand
    variable requiredFields
    variable minTime

    log::debug "item match: [lindex $args 0]"
    set i 1
    foreach match [lrange $args 1 end] {
        log::debug "item submatch $i: $match"
        incr i
    }

    if {![invokeUserScript $itemCommand $args vars]} {
        return
    }
    set diff [struct::set difference $requiredFields [dict keys $vars]]
    # if not all required vars are set
    if {[struct::set size $diff] > 0} {
        # 'link' field may be absent from initially generated set
        if {[dict exists $vars link]} {
            set url [dict get $vars link]
            if {[cache exists $url $diff]} {
                log::info "from cache: $url"
                set more [cache get $url]
                set call false
            } else {
                log::info "calling user script: $url"
                set call true
            }
        } else {
            log::info "calling user script"
            set call true
        }
        # if not all required fields are set, call moreInfoCommand to retrieve
        # missing field values
        if {$call} {
            if {![invokeUserScript $moreInfoCommand $vars more]} {
                return
            }
            set diff [struct::set difference $diff [dict keys $more]]
            if {[struct::set size $diff] != 0} {
                error "the following feed item fields are not set by user script: $diff"
            }
        }
        set vars [dict merge \
            $vars \
            $more \
        ]
        # link is a required var
        cache keep [dict get $vars link] {*}$more
    }
    # finally pass items to feed generator if time matches
    if {[dict get $vars date] >= $minTime} {
        uplevel #0 [concat $generatorCommand $vars]
    }
}

# Invoke command with appended list of arguments 'arg' and place result into
# variable named 'resultVar'. If an error occured, throw error with
# USERSCRIPT code.
# @param command command to execute
# @param arg list of arguments for command
# @param resultVar name of variable for result
# @return true if no break or continue code passed from the user script
proc invokeUserScript {command arg resultVar} {
    upvar $resultVar res

    switch [catch {uplevel #0 [concat $command $arg]} res opts] {
        0 {
            return true
        }
        1 {
            return \
                -code 1 \
                -options [list -oldoptions $opts -olderror $res] \
                -errorcode USERSCRIPT \
                -errorinfo [dict get $opts -errorinfo] \
                "user script error ($command): $res"
        }
        default {
            # break execution on 'break' or 'continue'
            log::info "break from user script"
            return false
        }
    }
}

}
